library(tidyverse) # for data cleaning and plotting
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.3 ✓ dplyr 1.0.2
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(lubridate) # for date manipulation
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(openintro) # for the abbr2state() function
## Loading required package: airports
## Loading required package: cherryblossom
## Loading required package: usdata
library(gplots) # for col2hex() function
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
library(RColorBrewer) # for color palettes
library(ggthemes) # for more themes (including theme_map())
library(plotly) # for the ggplotly() - basic interactivity
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(gganimate) # for adding animation layers to ggplots
library(transformr) # for "tweening" (gganimate)
library(gifski) # need the library for creating gifs but don't need to load each time
library(shiny) # for creating interactive apps
library(lubridate) # for date manipulation
library(ggthemes) # for even more plotting themes
library(janitor)
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
theme_set(theme_minimal())
AAPL <- read_csv("AAPL.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
AAPL <- AAPL%>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector = "Technology",
Company = "AAPL") %>%
mutate(return = (Close - 75.0875)/ 75.0875 * 100)
HON <- read_csv("HON.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
HON <- HON %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector = "Industrials",
Company = "HON") %>%
mutate(return = (Close - 180.79)/ 180.79 * 100)
UNP <- read_csv("UNP.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
UNP <- UNP %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector = "Industrials",
Company = "UNP") %>%
mutate(return = (Close - 182.27)/ 182.27 * 100)
UPS <- read_csv("UPS.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
UPS <- UPS %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector = "Industrials",
Company = "UPS") %>%
mutate(return = (Close - 116.79)/ 116.79 * 100)
JNJ <- read_csv("JNJ.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
JNJ <- JNJ %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector = "Health care",
Company = "JNJ") %>%
mutate(return = (Close - 145.97)/ 145.97 * 100)
UNH <- read_csv("UNH.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
UNH <- UNH %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector = "Health care",
Company = "UNH") %>%
mutate(return = (Close - 292.50)/ 292.50 * 100)
RHHBY<- read_csv("RHHBY.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
RHHBY <- RHHBY %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector = "Health care",
Company = "RHHBY") %>%
mutate(return = (Close - 40.89)/ 40.89 * 100)
MSFT<- read_csv("MSFT.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
MSFT <- MSFT %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector = "Technology",
Company = "MSFT") %>%
mutate(return = (Close - 160.62 )/ 160.62 * 100)
NVDA<- read_csv("NVDA.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
NVDA <- NVDA %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector = "Technology",
Company = "NVDA") %>%
mutate(return = (Close - 239.91)/ 239.91 * 100)
GOOGL<- read_csv("GOOGL.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
GOOGL <- GOOGL %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector = "Communication Services",
Company = "GOOGL") %>%
mutate(return = (Close - 1368.68 )/ 1368.68 * 100)
TCEHY<- read_csv("TCEHY.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
TCEHY <- TCEHY %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector = "Communication Services",
Company = "TCEHY") %>%
mutate(return = (Close - 49.88)/ 49.88 * 100)
FB<- read_csv("FB.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
FB <- FB %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector = "Communication Services",
Company = "FB") %>%
mutate(return = (Close - 209.78)/ 209.78 * 100)
NEE<- read_csv("NEE.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
NEE <- NEE %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector = "Utilities",
Company = "NEE") %>%
mutate(return = (Close - 59.6550)/ 59.6550 * 100)
ENLAY<- read_csv("ENLAY.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
ENLAY <- ENLAY %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector = "Utilities",
Company = "ENLAY") %>%
mutate(return = (Close - 8.05)/ 8.05 * 100)
IBDSF<- read_csv("IBDSF.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
IBDSF <- IBDSF %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector = "Utilities",
Company = "IBDSF") %>%
mutate(return = (Close - 10.23)/ 10.23 * 100)
BRKA<- read_csv("BRK-A.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
BRKA <- BRKA %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector = "Financials",
Company = "BRKA") %>%
mutate(return = (Close - 342261)/ 342261 * 100)
JPM<- read_csv("JPM.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
JPM <- JPM %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector = "Financials",
Company = "JPM") %>%
mutate(return = (Close - 141.09)/ 141.09 * 100)
V<- read_csv("V.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
V <- V %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector = "Financials",
Company = "V") %>%
mutate(return = (Close - 191.12)/ 191.12 * 100)
AMZN<- read_csv("AMZN.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
AMZN <- AMZN %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector = "Communication Discretionary",
Company = "AMZN") %>%
mutate(return = (Close - 1898.01)/ 1898.01 * 100)
TSLA<- read_csv("TSLA.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
TSLA <- TSLA %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector = "Communication Discretionary",
Company = "TSLA") %>%
mutate(return = (Close - 86.052)/ 86.052 * 100)
BABA<- read_csv("BABA.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
BABA <- BABA %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector = "Communication Discretionary",
Company = "BABA") %>%
mutate(return = (Close - 219.77 )/ 219.77 * 100)
WMT<- read_csv("WMT.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
WMT <- WMT %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector = "Consumer Staples",
Company = "WMT") %>%
mutate(return = (Close - 118.94)/ 118.94 * 100)
NSRGF<- read_csv("NSRGF.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
NSRGF <- NSRGF %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector = "Consumer Staples",
Company = "NSRGF") %>%
mutate(return = (Close - 108.06 )/ 108.06 * 100)
PG<- read_csv("PG.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
PG <- PG %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector = "Consumer Staples",
Company = "PG") %>%
mutate(return = (Close - 123.41 )/ 123.41 * 100)
SPY<- read_csv("SPY.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
SPY <- SPY %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector = "All field",
Company = "SPY") %>%
mutate(return = (Close - 324.87 )/ 324.87 * 100)
XOM <- read_csv("XOM.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
XOM <- XOM %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector="Energy",
Company= "XOM") %>%
mutate(return = (Close - 70.90)/ 70.90 * 100)
CVX <- read_csv("CVX.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
CVX <- CVX %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector="Energy",
Company= "CVX") %>%
mutate(return = (Close - 121.43)/ 121.43 * 100)
RDS_A <-read_csv("RDS-A.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
RDS_A <- RDS_A %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector="Energy",
Company= "RDS_A")%>%
mutate(return = (Close - 59.74)/ 59.74 * 100)
energy <- rbind(XOM,CVX,RDS_A)
AMT <- read_csv("AMT.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
AMT <- AMT %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector="Real Estate",
Company= "AMT")%>%
mutate(return = (Close - 228.50)/ 228.50 * 100)
KE <- read_csv("KE.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
KE <- KE %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector="Real Estate",
Company= "KE")%>%
mutate(return = (Close - 17.690)/ 17.690 * 100)
PLD <- read_csv("PLD.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
PLD <- PLD %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector="Real Estate",
Company= "PLD")%>%
mutate(return = (Close - 88.40 )/ 88.40 * 100)
real_estate <- rbind(AMT,KE,PLD)
BHP <-read_csv("BHP.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
BHP <- BHP%>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector="Material",
Company= "RDS_A") %>%
mutate(return = (Close - 54.92)/ 54.92 * 100)
LIN <-read_csv("LIN.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
LIN <- LIN %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector="Material",
Company= "LIN") %>%
mutate(return = (Close - 210.74)/ 210.74 * 100)
RIO <-read_csv("RIO.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
RIO <- RIO %>%
select(-Open, -High, -Low, -`Adj Close`) %>%
mutate(Sector="Material",
Company= "RIO")%>%
mutate(return = (Close - 59.89)/ 59.89 * 100)
material <- rbind(BHP,LIN,RIO)
communication_services <- rbind(GOOGL, TCEHY, FB)
utilities <- rbind(NEE, ENLAY, IBDSF)
financials <- rbind(JPM, BRKA, V)
communication_discretionary <- rbind(AMZN, TSLA, BABA)
consumer_staple <- rbind(WMT, NSRGF, PG)
tech <- rbind(AAPL,MSFT,NVDA)
industrials <- rbind(HON,UNP,UPS)
health <- rbind(JNJ,UNH,RHHBY)
sp <- rbind(communication_services, utilities, financials, communication_discretionary, consumer_staple,
tech, industrials, health, real_estate, material, energy, SPY)
tech %>%
ggplot(aes(y = Close, x = Date, color = Company)) +
geom_line()+
geom_text(aes(label = Company))+
labs(subtitle = "Date:{frame_along}")+
transition_reveal(Date)+
theme(legend.position = 0)
anim_save("tech.gif")
knitr::include_graphics("tech.gif")

health %>%
ggplot(aes(y = Close, x = Date, color = Company)) +
geom_line()+
geom_text(aes(label = Company))+
labs(subtitle = "Date:{frame_along}")+
transition_reveal(Date)+
theme(legend.position = 0)
anim_save("health.gif")
knitr::include_graphics("health.gif")

industrials %>%
ggplot(aes(y = Close, x = Date, color = Company)) +
geom_line()+
geom_text(aes(label = Company))+
labs(subtitle = "Date:{frame_along}")+
transition_reveal(Date)+
theme(legend.position = 0)
anim_save("industrials.gif")
knitr::include_graphics("industrials.gif")

sp %>%
group_by(Sector,Company) %>%
filter(Sector == "Technology") %>%
ggplot(aes(x = Date, y = return, color = Company))+
geom_line()

sp %>%
filter(Sector == "Industrials") %>%
ggplot(aes(x = Date, y = return, color = Company))+
geom_line()

sp %>%
filter(Sector == "Health care") %>%
ggplot(aes(x = Date, y = return, color = Company))+
geom_line()

sp %>%
filter(Sector == "Communication Services") %>%
ggplot(aes(x = Date, y = return, color = Company))+
geom_line()

sp %>%
filter(Sector == "Utilities") %>%
ggplot(aes(x = Date, y = return, color = Company))+
geom_line()

sp %>%
filter(Sector == "Financials") %>%
ggplot(aes(x = Date, y = return, color = Company))+
geom_line()

sp %>%
filter(Sector == "All field") %>%
ggplot(aes(x = Date, y = return, color = Company))+
geom_line()

return_type <- sp %>%
filter(Sector!="All field") %>%
select(-Close, -Volume) %>%
group_by(Date, Sector) %>%
mutate(avg_return = mean(return),
type_return = ifelse(avg_return<0, "negative", "positive"))%>%
ungroup() %>%
group_by(Sector) %>%
mutate(`Proportion negative` = mean(type_return == "negative"),
`Proportion positive` = mean(type_return == "positive")) %>%
pivot_longer(cols = `Proportion negative`:`Proportion positive`,
names_to = "prop_type",
values_to = "prop") %>%
summarize(Sector, prop_type, prop) %>%
distinct(Sector, prop_type, prop)
## `summarise()` regrouping output by 'Sector' (override with `.groups` argument)
return_type %>%
ggplot(aes(x = prop, y = prop_type, fill = prop_type )) +
geom_col()+
geom_text(aes(label = round(prop,digits = 2)), size = 2)+
facet_wrap(~Sector)+
labs(title = "Proportions of negative and positive daily average returns per sector",
fill = "Type of return",
x = "",
y = "")+
theme(legend.position = 0,
panel.grid.major.y = element_blank(),
panel.grid.minor.y=element_blank())

volume <- sp %>%
filter(Sector!="All field") %>%
select(-Close, -return) %>%
mutate(month = month(Date,label = TRUE),
year = year(Date)) %>%
group_by(Sector, month, year) %>%
summarize(`Total volume` = sum(Volume)) %>%
ungroup() %>%
mutate(date = ymd(paste(year,month,"01"))) %>%
mutate(Sector = fct_reorder(Sector, `Total volume`, median, .desc = TRUE)) %>%
ggplot(aes(x = date, y = `Total volume`, fill = Sector))+
geom_col()+
labs(title = "Monthly amount of transactions per sector last year",
y = "",
x = "")+
scale_x_date(date_labels = "%b", date_breaks = "3 months")+
scale_y_continuous(labels = scales::comma)+
theme(legend.position = "none",
panel.grid.major.x = element_blank(),
panel.grid.major.y = element_blank(),
panel.grid.minor.y =element_blank())+
facet_wrap(~Sector)
## `summarise()` regrouping output by 'Sector', 'month' (override with `.groups` argument)
ggplotly(volume,
tooltip = c("y"))